* 1. get data on health shocks with gender and ages

use anon t max_bnoSext if max_bnoSext==1 using "$out/placebo_base_bnoSext_2023Dec.dta", clear
merge 1:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(ferfi kor) 

egen total_bnoSext_months = total(max_bnoSext), by(anon)

tempfile health_shocks
save `health_shocks'

* get a list of people-month pairs having any kind of shock that time
use `health_shocks', clear
keep anon t
tempfile list
save `list'

* get a list of people affected with any type of shocks
use `list', clear
keep anon
duplicates drop
tempfile people
save `people'

* 2. narrow down health shocks

* take acidents and keep only those people who have at most 4 and within a period of 4 months

* start with people having any accident
use `health_shocks', clear

* keep those who correspond to the above criteria
egen num=sum(max_bnoSext), by(anon)
tab num

egen first=min(t) if max_bnoSext==1, by(anon)
gen num_first=1 if first==t
tsset anon t
replace num_first=2 if num_first==1 & f.max_bnoSext==1
replace num_first=num_first+1 if num_first!=. & f2.max_bnoSext==1
replace num_first=num_first+1 if num_first!=. & f3.max_bnoSext==1
tab num_first if f4.max_bnoSext==1

tab num_first if num==num_first
tab num_first

gen keep=num==num_first
egen tokeep=max(keep), by(anon)
keep if tokeep==1
drop keep tokeep
* collapse the multiple subsequent shocks
gen t_end=t if t==first
replace t_end=t+1 if t==first & f.max_bnoSext==1
replace t_end=t+2 if t==first & f2.max_bnoSext==1
replace t_end=t+3 if t==first & f3.max_bnoSext==1
keep if t==first
keep anon t t_end num_first
tempfile bnoS
save `bnoS' 


* 3. keep a subsample: 
	* people below 50
	* only those cases in which people are employed in a firm at some point after the shock

* 3.1 prepare data and save resulting database before narrowing down the sample	
	
* add data on firms these people are attached to
use `people', clear
merge 1:m anon using "$in/admin3_alap.dta", keep(match) keepusing(t ev vallazon1) nogen

* get first ever affiliation with a certain firm, focusing on vallazon1, but also checking if it changes from/to vallazon2
xtset anon t
egen mint1=min(t) if vallazon1!=., by(vallazon1 anon)
preserve 
use `people', clear
merge 1:m anon using "$in/admin3_alap_kieg_long.dta", keep(match) keepusing(t ev vallazon id_sor) nogen
tempfile extra_id
save `extra_id'
restore
append using `extra_id'
egen mint2=min(t) if vallazon1!=., by(vallazon1 anon)
gen mint=min(mint1, mint2)

gen start=mint==t

drop if id_sor!=.
drop mint* id_sor

* add the month of the shock(s) and denote a shock month with st (using the broad shock database)
merge 1:1 anon t using `list'
gen st=t if _merge==3
gen sev=ev if _merge==3
drop _merge

* save the start of new person-firm connections
preserve
keep if start==1
keep anon t start
tempfile start_firm
save `start_firm'
restore
drop start

* technical part to define if the person is ever employed after a health shock in t(=st)

* reshape the data in a way that all health shock months are observed in every month for a person
preserve
keep anon st
keep if st!=.
egen c=seq(), by(anon)
reshape wide st, i(anon) j(c)
tempfile temp_st
save `temp_st'
restore

merge m:1 anon using `temp_st'
drop _merge

* generate variables showing if the person ever worked after a specific health shock that month
	* 17 is the maximum number of health shocks a person can have
foreach n of numlist 1/17{
	egen postt`n'_=min(t) if vallazon1!=. & t>st`n', by(anon)
	egen postt`n'=min(postt`n'_), by(anon)
}
gen never_emp_again=0
foreach n of numlist 1/17{
	replace never_emp_again=1 if t==st`n' & postt`n'==.
}

* keep only the health shock months
keep if t==st
* drop variables not needed any more
drop ev st* postt* 

* add previous information on health shocks, age and gender (at the time of the shock)

merge 1:1 anon t using `health_shocks'
drop _merge
rename t st

codebook anon

sum
save "$out/base_shock_data_w_postemp_info_bnoSext_2022Sept.dta", replace


* 3.2 narrow down the shocks to be considered

* drop those who are never employed again

use "$out/base_shock_data_w_postemp_info_bnoSext_2022Sept.dta" , clear

* drop those who have more complicated shock patterns than defined in 2.

rename st t
merge 1:1 anon t using `bnoS'
drop if _merge==2
replace max_bnoSext=0 if _merge!=3
rename num_first num_bnoS
drop _merge

keep if max_bnoSext==1 

drop if never_emp_again==1
drop never_emp_again vallazon1

* shock type, month, id, gender, age
rename t st
gen sid=_n
tempfile shocks
save `shocks'
keep anon st sid t_end
* person id, shock id, month
tempfile list
save `list'

* 4. add employment data for the previous and next 2 years

expand 49+(t_end-st)
egen t=seq(), by(sid)
replace t=st+t-25
drop if t>180
merge m:1 anon t using "$in/admin3_alap.dta", keepusing(vallazon1 fogvisz1 w1 wh1 id_tip wtip) keep(master match)
drop _merge

#delimit ;
label define lid_tip 
0 "no firm in t" 
1 "1 firm on 15th of t" 
2 "2 firms on 15th of t" 
3 ">2 firms on 15th of t" 
4 "1 firm on 15th of t + non15" 
5 "2 firms on 15th of t + non15" 
6 "only non15 firms in t", replace
;
#delimit cr
label values id_tip lid_tip		
label define lwtip 0 "monthly" 1 "smoothed" 2 "mixed"
label values wtip lwtip 
label var id_tip "how many firm connections in t"
label var wtip "if wage smoothing"

merge m:1 anon t using `start_firm'
drop if _merge==2
drop _merge

* 5. Add number of days being on sick leave 

merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match)
assert passziv_tip1!=. if _merge==3
drop _merge
rename passziv_tip1 sick_leave_days

merge 1:1 anon t using "$in/admin3_pell_alap.dta", keepusing(tappenz_nap tappenz_osszeg) keep(master match) nogen
sum t if tappenz_nap!=.
sum sick_leave_days tappenz_nap if t>=109
sum sick_leave_days tappenz_nap if t>=109 & sick_leave_days!=0
gen sickd_tapn="00" if (sick_leave_days==0 | sick_leave_days==.) & (tappenz_nap==0 | tappenz_nap==.)
replace sickd_tapn="01" if (sick_leave_days==0 | sick_leave_days==.) & tappenz_nap!=0 & tappenz_nap!=.
replace sickd_tapn="10" if sick_leave_days!=0 & sick_leave_days!=. & (tappenz_nap==0 | tappenz_nap==.)
replace sickd_tapn="11" if sick_leave_days!=0 & sick_leave_days!=. & tappenz_nap!=0 & tappenz_nap!=.
tab sickd_tapn if t>=109, m
gen pdiff=(sick_leave_days-tappenz_nap)/((sick_leave_days+tappenz_nap)/2) if t>=109
sum pdiff if sickd_tapn=="11", d
sum anon if pdiff==0 & sickd_tapn=="11"
drop pdiff sickd_tapn
rename tappenz_nap sickl_days_1217
rename tappenz_osszeg sickness_ben1217

* 6. Add further information on health shocks 

merge m:1 sid st anon using `shocks'
drop _merge
gen month=mod(t,12)
replace month=12 if month==0

label var st "Month of the shock"
label var t "Month in admin3"
label var sev "Year of the shock"
label var anon "Person ID"
label var month "Month of the year"
label var vallazon1 "Firm ID - main firm"
label var w1 "Monthly wage - main firm"
label var wh1 "Monthly hours worked - main firm"
label var fogvisz1 "Employment status (201 is employment contract) - main firm"
label var sick_leave_days "Number of days on sick leave, max 31"
label var start "First ever connection to firm"
rename ferfi male
rename kor age
label var male "dummy for male"
label var age "age at the time of the shock"

* 7. Add further data on monthly education (in month t and not in the month of the shock)

preserve
use anon t oh_jar oh_mrend using "$in/admin3_alap.dta", clear
tab oh_jar
tab oh_mrend
tab oh_mrend, nolabel
gen educ=oh_jar!=.
replace educ=2 if educ==1 & oh_mrend==0
label define educ_lab 0 "not in education" 1 "full-time education" 2 "evening classes"
label val educ educ_lab
tab educ
label variable  educ "Type of education in t (categories)"
keep anon t educ
tempfile temp2
save `temp2'
restore

merge m:1 anon t using `temp2', keep(master match)
drop _merge

* 8. additional drops:

* keep only if ever employed in my data (within +/-2 two years around the shock)

merge m:1 anon t using "$in/admin3_alap_identitas2.dta", keep(master match) keepusing(fogvisz2 vallazon2 w2 wh2) nogen
label var vallazon2 "Firm ID - secondary firm"
label var w2 "Monthly wage - secondary firm"
label var wh2 "Monthly hours worked - secondary firm"
label var fogvisz2 "Employment status (201 is employment contract) - secondary firm"
sum anon if fogvisz1==201
sum anon if fogvisz2==201 & fogvisz1!=201

gen keep=fogvisz1==201 | fogvisz2==201
egen tokeep=max(keep), by(anon sid)
keep if tokeep==1
drop keep tokeep
	
drop sid

* add feor and transfer data

merge m:1 anon t using "$in/admin3_alap.dta", keepusing(feor1_h2 transfer_mtp) keep(master match)
drop _merge

* 9. Save the data in separate files by shock type

foreach X in max_bnoSext {
	if "`X'"=="max_bnoSext"{
		local Y accidents_bnoSext
	}
	preserve
	* keep a specific shock
	keep if `X'==1
	tempfile temp
	save `temp'
	* create / label shock id-s by person (only technical for bnoS) 
	if "`X'"=="max_bnoSext"{
		gen sid=1
	}
	else{
		rename sid_`X' sid
	}
	label var sid "ID of the shock by person"
	* number of same type shocks per person (# sid-s for narrow spending shocks)
	if "`X'"!="max_bnoSext"{
		rename num_`X' numshock
		label var numshock "number of same type shocks"
	}
	* drop the variables not needed
	drop max_bnoSext 
	* add the month of next shock of same type
	if "`X'"!="max_bnoSext"{
		tempfile temp
		save `temp'
		keep anon sid st
		duplicates drop
		replace sid=sid-1
		drop if sid==0
		rename st st_next
		merge 1:m anon sid using `temp'
		drop if _merge==1
		drop _merge
		label var st_next "Month of next shock of same type"
	}
	* add all the shocks in the 2+2-year period around the shock of interest (using the full data)
	merge m:1 anon t using `health_shocks', keepusing(max_bnoSext) keep(master match)
	drop _merge
	foreach T in max_bnoSext{
		replace `T'=0 if `T'==.
	}
	label var max_bnoSext "person has accident in t"
	* create a variable showing the pattern of the combined health shock
	if "`X'"=="max_bnoSext"{
		xtset anon t
		gen clust="11" if st==t & f.max_bnoSext==1 & f2.max_bnoSext!=1 & f3.max_bnoSext!=1
		replace clust="101" if st==t & f.max_bnoSext!=1 & f2.max_bnoSext==1 & f3.max_bnoSext!=1
		replace clust="1001" if st==t & f.max_bnoSext!=1 & f2.max_bnoSext!=1 & f3.max_bnoSext==1
		replace clust="111" if st==t & f.max_bnoSext==1 & f2.max_bnoSext==1 & f3.max_bnoSext!=1
		replace clust="1011" if st==t & f.max_bnoSext!=1 & f2.max_bnoSext==1 & f3.max_bnoSext==1
		replace clust="1101" if st==t & f.max_bnoSext==1 & f2.max_bnoSext!=1 & f3.max_bnoSext==1
		replace clust="1111" if st==t & f.max_bnoSext==1 & f2.max_bnoSext==1 & f3.max_bnoSext==1
		tempfile clust
		save `clust'
		keep anon clust
		duplicates drop
		keep if clust!=""
		rename clust cluster
		merge 1:m anon using `clust'
		drop clust _merge
	}
	* create the event month
	gen m=t-st
	if "`X'"=="max_bnoSext"{
		replace m=m-1 if m>0 & t_end-st>=1
		replace m=m-1 if m>0 & t_end-st>=2
		replace m=m-1 if m>0 & t_end-st>=3
		assert m>=-24 & m<=24
	}
	else{
		drop if m>24
	}
	drop t_end
	label var m "event month"
	if "`X'"=="max_bnoSext"{
		cap drop sid
		order anon st sev m t month sick_leave_days vallazon1 vallazon2 w1 w2 wh1 wh2 fogvisz1 fogvisz2 male age educ
		sort anon t
	}
	else{
		order anon sid st sev m t month sick_leave_days vallazon1 vallazon2 w1 w2 wh1 wh2 fogvisz1 fogvisz2 male age educ
		sort anon sid t
	}
	sum
	save "$out/emp_info_around_`Y'_2022Sept.dta", replace
	restore
}	

clear
